# Project: From Home Base to Swing States: The Evolution of Digital Advertising 
#          Strategies during the 2020 US Presidential Primary
# Authors: NaLette Brodnax and Piotr Sapiezynski

library(tidyverse)
library(broom)

ads <- read_csv('replication_data_monthly.csv') %>% 
  filter(candidate != 'Richard Ojeda')

highest_office = c('US Senator', 'None', 'US Senator', 'US Representative',
                   'US Senator', 'US Senator', 'US Representative', 'Governor',
                   'US Senator', 'US Representative', 'Vice President', 'Mayor',
                   'US Senator', 'US Senator', 'None', 'US Senator', 'Mayor',
                   'US Senator', 'Mayor', 'US Representative', 'Governor',
                   'US Representative', 'None', 'US Representative', 'Mayor')

candidate_list <- ads %>%
  select(candidate) %>%
  unique() %>%
  group_by(candidate) %>%
  summarise(n = n()) %>%
  bind_cols(data.frame('office' = highest_office)) %>%
  select(-n)

# Fit by candidate (simple with no random slopes and no interactions)
mod_simple <- function(df, can){
  data <- filter(df, can == candidate)
  fit <- lm(budget_frac_per_resident ~ is_home + swing + feb + march, 
           data = data)
  return(fit)
} 

all_fits <- lapply(candidate_list$candidate, function(x) mod_simple(ads, x))

# Convert results to dataframe
out <- lapply(all_fits, function(x) tidy(x)) %>% 
  bind_rows() %>% 
  filter(term == 'is_home') %>% 
  bind_cols(candidate_list)

# Plot by highest office
ggplot(aes(as.factor(candidate), estimate), data=out) + geom_point() + 
  geom_errorbar(aes(ymin=estimate-std.error, ymax=estimate+std.error), 
                width=.1) + ylab('Candidate') + xlab('Home Estimate') +
  facet_wrap(office ~., scales = 'free_y', ncol = 1) + coord_flip() 
